<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
  <meta charset="utf-8" />
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  <title>beyondml.tflow.utils.transformer &mdash; BeyondML  documentation</title>
      <link rel="stylesheet" href="../../../../_static/pygments.css" type="text/css" />
      <link rel="stylesheet" href="../../../../_static/css/theme.css" type="text/css" />
  <!--[if lt IE 9]>
    <script src="../../../../_static/js/html5shiv.min.js"></script>
  <![endif]-->
  
        <script src="../../../../_static/jquery.js"></script>
        <script src="../../../../_static/_sphinx_javascript_frameworks_compat.js"></script>
        <script data-url_root="../../../../" id="documentation_options" src="../../../../_static/documentation_options.js"></script>
        <script src="../../../../_static/doctools.js"></script>
        <script src="../../../../_static/sphinx_highlight.js"></script>
    <script src="../../../../_static/js/theme.js"></script>
    <link rel="index" title="Index" href="../../../../genindex.html" />
    <link rel="search" title="Search" href="../../../../search.html" /> 
</head>

<body class="wy-body-for-nav"> 
  <div class="wy-grid-for-nav">
    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
      <div class="wy-side-scroll">
        <div class="wy-side-nav-search" >

          
          
          <a href="../../../../index.html" class="icon icon-home">
            BeyondML
          </a>
<div role="search">
  <form id="rtd-search-form" class="wy-form" action="../../../../search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>
        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
              <p class="caption" role="heading"><span class="caption-text">Documentation:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../modules.html">beyondml</a></li>
</ul>

        </div>
      </div>
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
          <a href="../../../../index.html">BeyondML</a>
      </nav>

      <div class="wy-nav-content">
        <div class="rst-content">
          <div role="navigation" aria-label="Page navigation">
  <ul class="wy-breadcrumbs">
      <li><a href="../../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
          <li class="breadcrumb-item"><a href="../../../index.html">Module code</a></li>
      <li class="breadcrumb-item active">beyondml.tflow.utils.transformer</li>
      <li class="wy-breadcrumbs-aside">
      </li>
  </ul>
  <hr/>
</div>
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">
             
  <h1>Source code for beyondml.tflow.utils.transformer</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">tensorflow</span> <span class="k">as</span> <span class="nn">tf</span>
<span class="kn">from</span> <span class="nn">beyondml.tflow.layers</span> <span class="kn">import</span> <span class="n">MultiDense</span><span class="p">,</span> <span class="n">SelectorLayer</span>


<div class="viewcode-block" id="build_transformer_block"><a class="viewcode-back" href="../../../../beyondml.tflow.utils.html#beyondml.tflow.utils.transformer.build_transformer_block">[docs]</a><span class="k">def</span> <span class="nf">build_transformer_block</span><span class="p">(</span>
        <span class="n">input_shape</span><span class="p">,</span>
        <span class="n">embed_dim</span><span class="p">,</span>
        <span class="n">num_heads</span><span class="p">,</span>
        <span class="n">neurons</span><span class="p">,</span>
        <span class="n">dropout_rate</span><span class="o">=</span><span class="mf">0.1</span><span class="p">,</span>
<span class="p">):</span>
<span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd">    Build a Transformer Block</span>

<span class="sd">    Parameters</span>
<span class="sd">    ----------</span>
<span class="sd">    input_shape : int or tuple of int</span>
<span class="sd">        The input shape for the model to use</span>
<span class="sd">    embed_dim : int</span>
<span class="sd">        The dimension of the embedding</span>
<span class="sd">    num_heads : int</span>
<span class="sd">        The number of attention heads to use</span>
<span class="sd">    neurons : int</span>
<span class="sd">        The number of hidden neurons to use in the hidden layer</span>
<span class="sd">    dropout_rate : float (default 0.1)</span>
<span class="sd">        Rate at which dropout is applied</span>
<span class="sd">    value_dim : int or None (default None)</span>
<span class="sd">        The dimension to use for the `value` matrix, if provided</span>

<span class="sd">    Returns</span>
<span class="sd">    -------</span>
<span class="sd">    transformer_block : TensorFlow keras Functional model</span>
<span class="sd">        The transformer block, which can then be used alone or as</span>
<span class="sd">        a layer in another model</span>
<span class="sd">    &quot;&quot;&quot;</span>
    <span class="n">input_layer</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">keras</span><span class="o">.</span><span class="n">layers</span><span class="o">.</span><span class="n">Input</span><span class="p">(</span><span class="n">input_shape</span><span class="p">)</span>
    <span class="n">query</span> <span class="o">=</span> <span class="n">MultiDense</span><span class="p">(</span><span class="n">embed_dim</span><span class="p">)([</span><span class="n">input_layer</span><span class="p">]</span> <span class="o">*</span> <span class="n">num_heads</span><span class="p">)</span>
    <span class="n">key</span> <span class="o">=</span> <span class="n">MultiDense</span><span class="p">(</span><span class="n">embed_dim</span><span class="p">)([</span><span class="n">input_layer</span><span class="p">]</span> <span class="o">*</span> <span class="n">num_heads</span><span class="p">)</span>
    <span class="n">value</span> <span class="o">=</span> <span class="n">MultiDense</span><span class="p">(</span><span class="n">embed_dim</span><span class="p">)([</span><span class="n">input_layer</span><span class="p">]</span> <span class="o">*</span> <span class="n">num_heads</span><span class="p">)</span>

    <span class="n">query_selectors</span> <span class="o">=</span> <span class="p">[</span>
        <span class="n">SelectorLayer</span><span class="p">(</span><span class="n">i</span><span class="p">)(</span><span class="n">query</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">num_heads</span><span class="p">)</span>
    <span class="p">]</span>
    <span class="n">key_selectors</span> <span class="o">=</span> <span class="p">[</span>
        <span class="n">SelectorLayer</span><span class="p">(</span><span class="n">i</span><span class="p">)(</span><span class="n">key</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">num_heads</span><span class="p">)</span>
    <span class="p">]</span>
    <span class="n">value_selectors</span> <span class="o">=</span> <span class="p">[</span>
        <span class="n">SelectorLayer</span><span class="p">(</span><span class="n">i</span><span class="p">)(</span><span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">num_heads</span><span class="p">)</span>
    <span class="p">]</span>
    <span class="n">attention_layers</span> <span class="o">=</span> <span class="p">[</span>
        <span class="n">tf</span><span class="o">.</span><span class="n">keras</span><span class="o">.</span><span class="n">layers</span><span class="o">.</span><span class="n">Attention</span><span class="p">()([</span><span class="n">query_selectors</span><span class="p">[</span><span class="n">i</span><span class="p">],</span> <span class="n">key_selectors</span><span class="p">[</span><span class="n">i</span><span class="p">],</span> <span class="n">value_selectors</span><span class="p">[</span><span class="n">i</span><span class="p">]])</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">num_heads</span><span class="p">)</span>
    <span class="p">]</span>
    <span class="n">concat</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">keras</span><span class="o">.</span><span class="n">layers</span><span class="o">.</span><span class="n">Concatenate</span><span class="p">()(</span><span class="n">attention_layers</span><span class="p">)</span>
    <span class="n">merge</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">keras</span><span class="o">.</span><span class="n">layers</span><span class="o">.</span><span class="n">Reshape</span><span class="p">((</span><span class="n">input_shape</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="o">-</span><span class="mi">1</span><span class="p">))(</span><span class="n">concat</span><span class="p">)</span>

    <span class="n">x</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">keras</span><span class="o">.</span><span class="n">layers</span><span class="o">.</span><span class="n">Dropout</span><span class="p">(</span><span class="n">dropout_rate</span><span class="p">)(</span><span class="n">merge</span><span class="p">)</span>
    <span class="n">out1</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">keras</span><span class="o">.</span><span class="n">layers</span><span class="o">.</span><span class="n">LayerNormalization</span><span class="p">(</span><span class="n">epsilon</span><span class="o">=</span><span class="mf">1e-6</span><span class="p">)(</span><span class="n">x</span><span class="p">)</span>
    <span class="n">x</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">keras</span><span class="o">.</span><span class="n">layers</span><span class="o">.</span><span class="n">Dense</span><span class="p">(</span><span class="n">neurons</span><span class="p">,</span> <span class="n">activation</span><span class="o">=</span><span class="s1">&#39;relu&#39;</span><span class="p">)(</span><span class="n">out1</span><span class="p">)</span>
    <span class="n">x</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">keras</span><span class="o">.</span><span class="n">layers</span><span class="o">.</span><span class="n">Dense</span><span class="p">(</span><span class="n">embed_dim</span> <span class="o">*</span> <span class="n">num_heads</span><span class="p">)(</span><span class="n">x</span><span class="p">)</span>
    <span class="n">x</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">keras</span><span class="o">.</span><span class="n">layers</span><span class="o">.</span><span class="n">Dropout</span><span class="p">(</span><span class="n">dropout_rate</span><span class="p">)(</span><span class="n">x</span><span class="p">)</span>
    <span class="n">x</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">keras</span><span class="o">.</span><span class="n">layers</span><span class="o">.</span><span class="n">Add</span><span class="p">()([</span><span class="n">out1</span><span class="p">,</span> <span class="n">x</span><span class="p">])</span>
    <span class="n">output_layer</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">keras</span><span class="o">.</span><span class="n">layers</span><span class="o">.</span><span class="n">LayerNormalization</span><span class="p">(</span><span class="n">epsilon</span><span class="o">=</span><span class="mf">1e-6</span><span class="p">)(</span><span class="n">x</span><span class="p">)</span>

    <span class="k">return</span> <span class="n">tf</span><span class="o">.</span><span class="n">keras</span><span class="o">.</span><span class="n">models</span><span class="o">.</span><span class="n">Model</span><span class="p">(</span><span class="n">input_layer</span><span class="p">,</span> <span class="n">output_layer</span><span class="p">)</span></div>


<div class="viewcode-block" id="build_token_position_embedding_block"><a class="viewcode-back" href="../../../../beyondml.tflow.utils.html#beyondml.tflow.utils.transformer.build_token_position_embedding_block">[docs]</a><span class="k">def</span> <span class="nf">build_token_position_embedding_block</span><span class="p">(</span>
    <span class="n">sequence_length</span><span class="p">,</span>
    <span class="n">vocab_size</span><span class="p">,</span>
    <span class="n">embed_dim</span>
<span class="p">):</span>
<span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd">    Builds a token and position embedding block</span>

<span class="sd">    Parameters</span>
<span class="sd">    ----------</span>
<span class="sd">    sequence_length : int</span>
<span class="sd">        The length of each sequence</span>
<span class="sd">    vocab_size : int</span>
<span class="sd">        The size of the vocabulary used</span>
<span class="sd">    embed_dim : int</span>
<span class="sd">        The desired embedding dimension</span>

<span class="sd">    Returns</span>
<span class="sd">    -------</span>
<span class="sd">    embedding_block : TensorFlow keras Functional model</span>
<span class="sd">        The embedding block, which can be used alone or</span>
<span class="sd">        as a layer in another model</span>
<span class="sd">    &quot;&quot;&quot;</span>
    <span class="n">tok_input</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">keras</span><span class="o">.</span><span class="n">layers</span><span class="o">.</span><span class="n">Input</span><span class="p">(</span><span class="n">sequence_length</span><span class="p">)</span>
    <span class="n">pos_input</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">keras</span><span class="o">.</span><span class="n">layers</span><span class="o">.</span><span class="n">Input</span><span class="p">(</span><span class="n">sequence_length</span><span class="p">)</span>

    <span class="n">tok_embed</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">keras</span><span class="o">.</span><span class="n">layers</span><span class="o">.</span><span class="n">Embedding</span><span class="p">(</span>
        <span class="n">vocab_size</span><span class="p">,</span> <span class="n">output_dim</span><span class="o">=</span><span class="n">embed_dim</span><span class="p">)(</span><span class="n">tok_input</span><span class="p">)</span>
    <span class="n">pos_embed</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">keras</span><span class="o">.</span><span class="n">layers</span><span class="o">.</span><span class="n">Embedding</span><span class="p">(</span>
        <span class="n">sequence_length</span><span class="p">,</span> <span class="n">output_dim</span><span class="o">=</span><span class="n">embed_dim</span><span class="p">)(</span><span class="n">pos_input</span><span class="p">)</span>
    <span class="n">output_layer</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">keras</span><span class="o">.</span><span class="n">layers</span><span class="o">.</span><span class="n">Add</span><span class="p">()([</span><span class="n">tok_embed</span><span class="p">,</span> <span class="n">pos_embed</span><span class="p">])</span>

    <span class="k">return</span> <span class="n">tf</span><span class="o">.</span><span class="n">keras</span><span class="o">.</span><span class="n">models</span><span class="o">.</span><span class="n">Model</span><span class="p">([</span><span class="n">tok_input</span><span class="p">,</span> <span class="n">pos_input</span><span class="p">],</span> <span class="n">output_layer</span><span class="p">)</span></div>
</pre></div>

           </div>
          </div>
          <footer>

  <hr/>

  <div role="contentinfo">
    <p>&#169; Copyright 2023, BeyondML Labs.</p>
  </div>

  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
    provided by <a href="https://readthedocs.org">Read the Docs</a>.
   

</footer>
        </div>
      </div>
    </section>
  </div>
  <script>
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script> 

</body>
</html>